data <- read.csv2(file.path(PATH,'final_project_stats/Data_Spotify.csv'), sep=';',header = TRUE)
str(data)
'data.frame': 1173 obs. of 24 variables:
$ URI : chr "spotify:track:5Zb6JxSEcjsUNrINcAyAYJ" "spotify:track:7cO2S4l7b9Rtn5MqPHXTJU" "spotify:track:73E08jF1urQQSO0oTzCPpP" "spotify:track:6eBK3edMW7bEzecF1eCezc" ...
$ song : chr "Strangers Like Me - From \"Tarzan\"/Soundtrack Version" "Under The Gun" "No One - Acoustic" "(Everything I Do) I Do It For You" ...
$ album : chr "Disney Summer Songs" "\"Let's Rock\"" "Vault Volume 1" "Waking Up The Neighbours" ...
$ artist : chr "Phil Collins" "The Black Keys" "Alicia Keys" "Bryan Adams" ...
$ release_date : int 2021 2019 2017 1991 1995 2020 2019 2008 1977 2013 ...
$ explicit : chr "False" "False" "False" "False" ...
$ msPlayed : int 3484323 2455772 116924 1429 2286762 171215 1068815 61718 38974 1797847 ...
$ X.markets : int 96 183 184 3 128 183 183 180 171 184 ...
$ popularity : int 64 41 67 65 49 75 51 56 77 44 ...
$ danceability : num 0.588 0.518 0.38 0.526 0.209 0.65 0.512 0.736 0.814 0.578 ...
$ energy : num 0.807 0.689 0.325 0.365 0.472 0.655 0.78 0.935 0.482 0.501 ...
$ key : chr "SOL#" "LA" "MI" "DO#" ...
$ loudness : num -6.28 -6.89 -5.86 -12.61 -12.32 ...
$ mode : chr "Major" "Major" "Major" "Major" ...
$ speechiness : num 0.0399 0.0409 0.0328 0.028 0.0386 0.405 0.0409 0.0637 0.0588 0.0296 ...
$ acousticness : num 0.164 0.0151 0.712 0.0775 0.321 0.25 0.0471 0.185 0.0111 0.306 ...
$ instrumentalness: num 1.85e-03 5.22e-04 0.00 1.35e-05 7.99e-04 0.00 2.42e-02 0.00 1.57e-06 1.39e-04 ...
$ liveness : num 0.209 0.107 0.0997 0.0618 0.45 0.0691 0.0856 0.129 0.0476 0.209 ...
$ valence : num 0.547 0.35 0.429 0.253 0.114 0.735 0.609 0.348 0.615 0.807 ...
$ tempo : num 121 120.1 89.9 131.3 144.7 ...
$ duration_ms : int 179587 196325 259570 394133 815160 171215 160543 216960 180267 165000 ...
$ time_signature : int 4 4 4 4 3 4 4 4 4 4 ...
$ Followers : int 4173070 3591601 9277942 2387338 15962917 4090589 3591601 83237 10163774 5343890 ...
$ Genres : chr "mellow gold" "rock" "pop" "mellow gold" ...
data <- data[data$msPlayed > 0,]
colnames(data)
[1] "URI" "song" "album" "artist" "release_date" "explicit" "msPlayed"
[8] "X.markets" "popularity" "danceability" "energy" "key" "loudness" "mode"
[15] "speechiness" "acousticness" "instrumentalness" "liveness" "valence" "tempo" "duration_ms"
[22] "time_signature" "Followers" "Genres"
y <- data$msPlayed
y_log <- log(y)
qplot(y, geom="histogram",bins = 30)
qplot(y_log, geom="histogram",bins = 30)
X <- subset(data,select=-msPlayed)
X <- model.matrix(~ X.markets + popularity + danceability + energy + key + loudness + mode + speechiness + acousticness + instrumentalness
+ liveness + valence + tempo + duration_ms + time_signature + artist + release_date + explicit+ release_date:danceability
+ release_date:energy + release_date:loudness + release_date:speechiness + release_date:acousticness
+ release_date:instrumentalness + release_date:liveness + release_date:valence + release_date:tempo + release_date:duration_ms
+ popularity:danceability + popularity:energy + popularity:loudness + popularity:speechiness + popularity:acousticness
+ popularity:instrumentalness + popularity:liveness + popularity:valence + popularity:tempo + popularity:duration_ms
+ danceability:energy + danceability:loudness + danceability:speechiness + danceability:acousticness + danceability:instrumentalness
+ danceability:liveness + danceability:valence + danceability:tempo + danceability:duration_ms + energy:loudness + energy:speechiness
+ energy:acousticness + energy:instrumentalness + energy:liveness + energy:valence + energy:tempo + energy:duration_ms
+ loudness:speechiness + loudness:acousticness + loudness:instrumentalness + loudness:liveness + loudness:valence + loudness:tempo
+ loudness:duration_ms + speechiness:acousticness + speechiness:instrumentalness + speechiness:liveness + speechiness:valence
+ speechiness:tempo + speechiness:duration_ms + acousticness:instrumentalness + acousticness:liveness + acousticness:valence
+ acousticness:tempo + acousticness:duration_ms + instrumentalness:liveness + instrumentalness:valence + instrumentalness:tempo
+ instrumentalness:duration_ms + liveness:valence + liveness:tempo + liveness:duration_ms + valence:tempo + valence:duration_ms
+ tempo:duration_ms, data=X)
dim(X)
[1] 1168 392
dummies <- c()
for (column in colnames(X)){
dummies <- c(dummies,length(unique(X[,column])) == 2)
}
categorical_features <- colnames(X)[dummies]
numerical_features <- colnames(X)[!dummies]
for(i in categorical_features) {
X[,i] <- as.factor(X[,i])
}
cv.mle= kfoldCV.mle(y_log,as.data.frame(X[,-1]),K = 10, seed=1)
r2.mle= cor(y_log,cv.mle$pred)^2
mse.mle = mean((y_log - cv.mle$pred)^2)
n <- length(y_log)
p <- as.integer(mean(cv.mle$num_betas))
bic.mle <- n * log(colSums((y_log-as.matrix(cv.mle$pred))^2)/length(y_log)) + n*(log(2*pi)+1) + log(n)*p
print(paste('Number of Betas:',p))
[1] "Number of Betas: 392"
print(paste('R^2 Score:',round(r2.mle,4)))
[1] "R^2 Score: 0.2909"
print(paste('MSE Score:',round(mse.mle,4)))
[1] "MSE Score: 4.4435"
print(paste('BIC Score:',round(bic.mle,4)))
[1] "BIC Score: 7825.3496"
fit.lasso= cv.glmnet(x=X[,-1], y=y_log, nfolds=10)
fit.lasso
Call: cv.glmnet(x = X[, -1], y = y_log, nfolds = 10)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 0.04361 32 3.236 0.1617 170
1se 0.09180 24 3.362 0.1656 75
print(paste('Number of Lambdas evaluated:',length(fit.lasso$lambda)))
[1] "Number of Lambdas evaluated: 100"
plot(fit.lasso)
plot(fit.lasso$glmnet.fit, xvar='lambda')
b.lasso= as.vector(coef(fit.lasso, s='lambda.min'))
print(paste('Lambda:',round(fit.lasso$lambda.min,4)))
[1] "Lambda: 0.0436"
print(paste('B_0:',sum(b.lasso!=0)))
[1] "B_0: 171"
cv.lasso= kfoldCV.lasso(y=y_log,x=X[,-1],K=10,seed=1,criterion="cv")
Starting cross-validation..........
r2.lassocv= cor(y_log,cv.lasso$pred)^2
mse.lassocv = mean((y_log - cv.lasso$pred)^2)
n <- length(y_log)
p <- as.integer(mean(cv.lasso$num_betas))
bic.lassocv <- n * log(colSums((y_log-as.matrix(cv.lasso$pred))^2)/length(y_log)) + n*(log(2*pi)+1) + log(n)*p
print(paste('Average Number of Betas:',p))
[1] "Average Number of Betas: 149"
print(paste('R^2 Score:',round(r2.lassocv,4)))
[1] "R^2 Score: 0.4228"
print(paste('MSE Score:',round(mse.lassocv,4)))
[1] "MSE Score: 3.2853"
print(paste('BIC Score:',round(bic.lassocv,4)))
[1] "BIC Score: 5756.3271"
fit.lassobic <- lasso.bic(y_log, X[,-1],extended = FALSE)
print(paste('Lambda:',fit.lassobic$lambda.opt))
[1] "Lambda: 0.121354773149321"
print(paste('B_0:',sum(fit.lassobic$coef!=0)))
[1] "B_0: 33"
cv.lassobic= kfoldCV.lasso(y=y_log,x=X[,-1],K=10,seed=1,criterion="bic")
Starting cross-validation..........
r2.lassobic= cor(y_log,cv.lassobic$pred)^2
mse.lassobic = mean((y_log - cv.lassobic$pred)^2)
n <- length(y_log)
p <- as.integer(mean(cv.lassobic$num_betas))
bic.lassobic <- n * log(colSums((y_log-as.matrix(cv.lassobic$pred))^2)/length(y_log)) + n*(log(2*pi)+1) + log(n)*p
print(paste('Average Number of Betas:',p))
[1] "Average Number of Betas: 22"
print(paste('R^2 Score:',round(r2.lassobic,4)))
[1] "R^2 Score: 0.38"
print(paste('MSE Score:',round(mse.lassobic,4)))
[1] "MSE Score: 3.7016"
print(paste('BIC Score:',round(bic.lassobic,4)))
[1] "BIC Score: 4998.6683"
fit.bayesreg <- modelSelection(y=y_log,x=X[,-1], priorCoef=zellnerprior(taustd=1),priorVar=igprior(alpha=.01, lambda=.01),
priorDelta=modelbbprior(1,1),family = 'normal')
Greedy searching posterior mode... Done.
Running Gibbs sampler........... Done.
head(postProb(fit.bayesreg),10)
names(fit.bayesreg$postMode[fit.bayesreg$postMode==1])
[1] "loudness" "artistAriel Posen" "artistConociendo Rusia"
[4] "artistEric Clapton" "artistGustavo Cerati" "artistJohn Mayer"
[7] "artistLuis Alberto Spinetta" "artistLuis Miguel" "artistPedro Aznar"
[10] "artistPink Floyd" "artistScorpions" "artistSoda Stereo"
[13] "artistThe Black Keys" "artistTom Misch" "artistTrent Reznor and Atticus Ross"
[16] "release_date" "popularity:duration_ms" "loudness:liveness"
ci.bayesreg <- coef(fit.bayesreg)[-c(1,nrow(coef(fit.bayesreg))),]
ci.bayesreg[,1:3]= round(ci.bayesreg[,1:3], 3)
ci.bayesreg[,4]= round(ci.bayesreg[,4], 4)
head(ci.bayesreg[order(abs(ci.bayesreg[,'margpp']),decreasing = T),],10)
estimate 2.5% 97.5% margpp
artistAriel Posen 3.761 3.205 4.325 1.0000
artistConociendo Rusia 4.021 3.431 4.609 1.0000
artistGustavo Cerati 3.231 2.856 3.557 1.0000
artistJohn Mayer 3.435 3.139 3.715 1.0000
artistSoda Stereo 2.697 2.325 3.032 1.0000
artistTom Misch 2.547 2.185 2.904 1.0000
artistThe Black Keys 2.159 1.615 2.696 0.9999
artistTrent Reznor and Atticus Ross -2.755 -3.507 -1.974 0.9987
release_date 0.026 0.018 0.032 0.9950
artistLuis Alberto Spinetta 2.064 1.390 2.716 0.9781
#cv_bms <- kfoldCV.bms(y_log,X[,-1],seed=10,K = 10)
r2.bms <- cor(y_log,cv_bms$pred)^2
mse.bms = mean((y_log - cv_bms$pred)^2)
n <- length(y_log)
p <- as.integer(mean(cv_bms$num_betas))
bic.bms <- n * log(colSums((y_log-as.matrix(cv_bms$pred))^2)/length(y_log)) + n*(log(2*pi)+1) + log(n)*p
print(paste('Average Number of Betas:',p))
[1] "Average Number of Betas: 131"
print(paste('R^2 Score:',round(r2.bms,4)))
[1] "R^2 Score: 0.4169"
print(paste('MSE Score:',round(mse.bms,4)))
[1] "MSE Score: 3.3035"
print(paste('BIC Score:',round(bic.bms,4)))
[1] "BIC Score: 5635.6452"
len_bayes <- length(ci.bayesreg[,1][ci.bayesreg[,1] != 0])
len_lasso <- sum(fit.lassobic$coef!=0)
table_n <- matrix(data=list(len_bayes,round(r2.bms,4),len_lasso,round(r2.lassobic,4)),ncol=2)
colnames(table_n) <- c('BMA','LASSO-BIC')
rownames(table_n) <- c('Nº of Selected Variables','CV-R2')
table_n
BMA LASSO-BIC
Nº of Selected Variables 72 33
CV-R2 0.4169 0.38
bayes_sel <- names(ci.bayesreg[,1][ci.bayesreg[,1] != 0])
lasso_sel <- names(fit.lassobic$coef[fit.lassobic$coef != 0])[-1]
lasso_dif <- lasso_sel[!lasso_sel %in% bayes_sel]
bayes_dif <- bayes_sel[!bayes_sel %in% lasso_sel]
mat <- cbind(ci.bayesreg,NA)
colnames(mat) <- c('estimate','2.5%','97.5%','margpp','color')
mat[bayes_sel,'color'] <- 4
mat[mat[,1] == 0,'color'] <- 8
mat[lasso_dif,'color'] <- 2
lasso_mat <- cbind(fit.lassobic$coef,NA)
colnames(lasso_mat) <- c('estimate','color')
lasso_mat[lasso_sel,'color'] <- 2
lasso_mat[lasso_mat[,'estimate'] == 0,'color'] <- 8
lasso_mat[bayes_dif,'color'] <- 4
plot(NA, ylim=c(-6,6), xlim=c(0,nrow(mat)), ylab='Coefficient Estimate', xlab='Coefficient Index', main='Bayesian Model Averaging')
cols= mat[,'color']
points(1:nrow(mat), mat[, 1], pch = 16,col=cols)
plot(NA, ylim=c(-6,6), xlim=c(0,nrow(lasso_mat)),ylab='Coefficient Estimate', xlab='Coefficient Index', main='LASSO-BIC')
cols= lasso_mat[,'color']
points(1:nrow(lasso_mat), lasso_mat[,'estimate'], pch = 16,col=cols)
ci.bayesreg[order(abs(ci.bayesreg[,'estimate']),decreasing = T),][bayes_sel,]
estimate 2.5% 97.5% margpp
loudness -0.031 -0.095 0.000 0.3248
liveness -0.003 0.000 0.000 0.0727
artist92914 0.005 0.000 0.000 0.0151
artistAce of Base -0.005 0.000 0.000 0.0164
artistAerosmith -0.001 0.000 0.000 0.0062
artistAir Supply -0.009 0.000 0.000 0.0331
artistAlicia Keys -0.765 -3.436 0.000 0.3669
artistAriel Posen 3.761 3.205 4.325 1.0000
artistBad Bunny -0.004 0.000 0.000 0.0167
artistBajofondo 0.002 0.000 0.000 0.0145
artistBob Marley & The Wailers -0.001 0.000 0.000 0.0176
artistBryan Adams -0.281 -5.354 0.000 0.1345
artistCallejeros -0.001 0.000 0.000 0.0057
artistCamouflage -0.003 0.000 0.000 0.0151
artistCardi B -0.001 0.000 0.000 0.0080
artistClean Bandit -0.313 -5.515 0.000 0.1543
artistConociendo Rusia 4.021 3.431 4.609 1.0000
artistCulture Club -0.001 0.000 0.000 0.0073
artistDavid Bowie -0.001 0.000 0.000 0.0082
artistDavid Gilmour 0.003 0.000 0.000 0.0273
artistDavid Lee Roth -0.001 0.000 0.000 0.0056
artistDire Straits 0.001 0.000 0.000 0.0069
artistDolly and the Dinosaur 0.002 0.000 0.000 0.0091
artistDuke Ellington 0.003 0.000 0.000 0.0163
artistEric Clapton 1.128 0.000 1.960 0.7096
artistFKJ 0.002 0.000 0.000 0.0130
artistGene Loves Jezebel -0.003 0.000 0.000 0.0078
artistGreta Van Fleet 0.018 0.000 0.000 0.0689
artistGrupo Play -0.002 0.000 0.000 0.0101
artistGustavo Cerati 3.231 2.856 3.557 1.0000
artistHiatus Kaiyote 0.005 0.000 0.000 0.0134
artistJamiroquai -0.157 -3.512 0.000 0.1467
artistJohn Mayer 3.435 3.139 3.715 1.0000
artistJordan Rakei 0.043 0.000 1.033 0.0903
artistJoss Stone -0.014 0.000 0.000 0.0255
artistLucky Daye -0.002 0.000 0.000 0.0091
artistLuis Alberto Spinetta 2.064 1.390 2.716 0.9781
artistLuis Miguel -1.723 -2.653 0.000 0.8483
artistMaroon 5 -0.161 -4.181 0.000 0.0997
artistMegan Thee Stallion -0.001 0.000 0.000 0.0111
artistMoby -0.001 0.000 0.000 0.0055
artistMustafunk 0.002 0.000 0.000 0.0170
artistNatalia Lafourcade -0.001 0.000 0.000 0.0140
artistNorah Jones -0.002 0.000 0.000 0.0187
artistPedro Aznar 1.885 0.000 3.684 0.6073
artistPet Shop Boys 0.001 0.000 0.000 0.0126
artistPink Floyd 1.149 0.000 1.823 0.7866
artistRed Hot Chili Peppers 0.006 0.000 0.000 0.0419
artistReik -0.622 -6.466 0.000 0.2136
artistRod Stewart -0.001 0.000 0.000 0.0058
artistSam Ock -0.004 0.000 0.000 0.0251
artistScorpions -5.256 -7.107 -3.227 0.9393
artistShakira 0.005 0.000 0.000 0.0271
artistSlash 0.004 0.000 0.000 0.0124
artistSoda Stereo 2.697 2.325 3.032 1.0000
artistStevie Wonder -0.001 0.000 0.000 0.0144
artistSystem Of A Down -0.001 0.000 0.000 0.0085
artistTaylor Swift -0.003 0.000 0.000 0.0131
artistTerrace Martin -0.070 0.000 0.000 0.0784
artistThe Black Keys 2.159 1.615 2.696 0.9999
artistThe Weeknd -0.013 0.000 0.000 0.0278
artistTom Misch 2.547 2.185 2.904 1.0000
artistTravis Tritt -0.001 0.000 0.000 0.0055
artistTrent Reznor and Atticus Ross -2.755 -3.507 -1.974 0.9987
release_date 0.026 0.018 0.032 0.9950
explicitTrue -0.211 -1.280 0.000 0.2527
danceability:liveness -0.001 0.000 0.000 0.0136
energy:speechiness -0.001 0.000 0.000 0.0101
energy:liveness -0.001 0.000 0.000 0.0081
loudness:liveness 0.093 0.000 0.145 0.6086
acousticness:liveness -0.032 0.000 0.000 0.0573
instrumentalness:liveness -0.001 0.000 0.000 0.0039
library(readr)
library(tidyverse)
library(quanteda) # quantitative analysis of textual data (https://quanteda.io/articles/quickstart.html)
library(quanteda.textplots) # complementary to quanteda, for visualization
library(cld3) # for language detection
library(lda) # implementation of Latent Dirichlet Allocation
library(servr) # will be used for visualization
# library(topicmodels) # alternative to lda, several topic models included
library(stm) # for structural topic modeling
library(topicmodels)
library(LDAvis)
library(knitr)
library(wordcloud)
library(RColorBrewer)
library(sjmisc)
lyrics <- read.csv2(file.path(PATH,'/Proyecto Stats Data/Sergio/lyrics_concat.csv'), sep=',',header = TRUE)
lyrics
lyricsCorpus <- corpus(lyrics, text_field = 'Lyrics')
languages <- detect_language(lyricsCorpus)
table(languages)
languages
ca en es fr gl hi-Latn ig it ja ko nl pl pt ru-Latn tr uk
9 1083 591 8 1 1 1 3 2 1 2 9 5 1 1 1
lyricsCorpus <- subset(lyricsCorpus, languages == "en")
ntokens_corpus <- ntoken(lyricsCorpus)
data.frame(ntokens_corpus) %>% ggplot(aes(ntokens_corpus)) + geom_histogram(binwidth=10) + xlab('Number of tokens')
lyrics_length20_600 <- names(ntokens_corpus[(ntokens_corpus>=20) & (ntokens_corpus<=600)])
lyricsCorpus_filtered <- lyricsCorpus[names(lyricsCorpus) %in% lyrics_length20_600]
ntokens_corpus <- ntoken(lyricsCorpus_filtered)
data.frame(ntokens_corpus) %>% ggplot(aes(ntokens_corpus)) + geom_histogram(binwidth=10) + xlab('Number of tokens')
custom_list_stopwords <- c(stopwords("en"),'url','copy','embed','urlcopyembedcopy','know','yeah','oh','ooh','verse','dit',
'la','chorus','thunder','s','m','embedshare','n','now','go','get','just','gonna',"ain't",'can','got',
'ah','one','see','pre','ma','untz','like','na','hey','want','wanna','boom','em','baby','let','give','cause',
'come','take','say','said','every','eh','aaahhh','deeya','badeeya','ahh','doo','al','nas','aah','eheu',
'bum','ohohohoh','dum','yoy','animalsmals','aghosts','ding')
dfm_lyricsCorpus<- tokens(lyricsCorpus_filtered, remove_punct=TRUE, remove_numbers = TRUE, remove_symbols = TRUE) %>%
tokens_remove(custom_list_stopwords) %>% dfm() %>% dfm_tolower() %>%
dfm_trim(min_termfreq = 2,docfreq_type = "prop")
dfm_lyricsCorpus
Document-feature matrix of: 1,030 documents, 5,383 features (99.10% sparse) and 25 docvars.
features
docs leaving hoping need ran love rolling stone way leave road
text1 1 1 1 1 1 1 1 1 1 4
text2 0 0 0 0 0 0 0 12 0 0
text3 0 0 0 0 3 0 0 3 0 0
text5 0 0 0 0 3 0 0 0 0 0
text7 0 0 0 0 0 0 0 0 0 0
text8 0 0 0 0 0 0 0 2 0 0
[ reached max_ndoc ... 1,024 more documents, reached max_nfeat ... 5,373 more features ]
textplot_wordcloud(dfm_lyricsCorpus, random_order = FALSE, rotation = 0.25,
color = RColorBrewer::brewer.pal(8, "Dark2"),max_words =200,max_size = 4)
fit_date <- stm(dfm_lyricsCorpus,prevalence = ~release_date, seed=123, max.em.its = 50, K=4)
Beginning Spectral Initialization
Calculating the gram matrix...
Finding anchor words...
....
Recovering initialization...
.....................................................
Initialization complete.
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 1 (approx. per word bound = -7.291)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 2 (approx. per word bound = -7.249, relative change = 5.720e-03)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 3 (approx. per word bound = -7.243, relative change = 8.820e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 4 (approx. per word bound = -7.238, relative change = 6.173e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 5 (approx. per word bound = -7.234, relative change = 5.808e-04)
Topic 1: life, girl, dance, long, things
Topic 2: never, make, back, world, need
Topic 3: time, night, right, think, good
Topic 4: love, way, feel, away, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 6 (approx. per word bound = -7.230, relative change = 5.424e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 7 (approx. per word bound = -7.227, relative change = 5.111e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 8 (approx. per word bound = -7.223, relative change = 5.080e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 9 (approx. per word bound = -7.219, relative change = 5.052e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 10 (approx. per word bound = -7.216, relative change = 4.987e-04)
Topic 1: life, girl, dance, long, things
Topic 2: never, make, back, need, world
Topic 3: time, night, right, think, good
Topic 4: love, feel, way, away, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 11 (approx. per word bound = -7.212, relative change = 5.184e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 12 (approx. per word bound = -7.208, relative change = 5.437e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 13 (approx. per word bound = -7.204, relative change = 5.626e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 14 (approx. per word bound = -7.200, relative change = 5.568e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 15 (approx. per word bound = -7.196, relative change = 5.519e-04)
Topic 1: life, girl, dance, long, things
Topic 2: never, make, back, need, world
Topic 3: time, night, think, right, good
Topic 4: love, feel, way, away, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 16 (approx. per word bound = -7.192, relative change = 5.496e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 17 (approx. per word bound = -7.188, relative change = 5.140e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 18 (approx. per word bound = -7.185, relative change = 5.032e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 19 (approx. per word bound = -7.181, relative change = 5.187e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 20 (approx. per word bound = -7.177, relative change = 5.048e-04)
Topic 1: life, girl, dance, long, light
Topic 2: never, make, back, need, world
Topic 3: time, night, think, right, good
Topic 4: love, feel, way, away, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 21 (approx. per word bound = -7.174, relative change = 4.668e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 22 (approx. per word bound = -7.171, relative change = 4.544e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 23 (approx. per word bound = -7.167, relative change = 4.861e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 24 (approx. per word bound = -7.164, relative change = 4.880e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 25 (approx. per word bound = -7.160, relative change = 4.786e-04)
Topic 1: life, girl, dance, long, light
Topic 2: never, make, back, need, world
Topic 3: time, night, think, good, right
Topic 4: love, feel, way, away, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 26 (approx. per word bound = -7.157, relative change = 4.767e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 27 (approx. per word bound = -7.154, relative change = 4.626e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 28 (approx. per word bound = -7.150, relative change = 4.653e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 29 (approx. per word bound = -7.147, relative change = 4.877e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 30 (approx. per word bound = -7.143, relative change = 5.079e-04)
Topic 1: life, girl, dance, long, light
Topic 2: never, make, back, need, world
Topic 3: time, night, think, good, hold
Topic 4: love, feel, away, way, heart
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 31 (approx. per word bound = -7.140, relative change = 5.051e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 32 (approx. per word bound = -7.136, relative change = 4.861e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 33 (approx. per word bound = -7.133, relative change = 4.799e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 34 (approx. per word bound = -7.129, relative change = 5.088e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 35 (approx. per word bound = -7.125, relative change = 5.293e-04)
Topic 1: life, girl, dance, light, long
Topic 2: never, make, need, back, keep
Topic 3: time, think, night, good, hold
Topic 4: love, feel, away, heart, way
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 36 (approx. per word bound = -7.122, relative change = 4.967e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 37 (approx. per word bound = -7.118, relative change = 4.544e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 38 (approx. per word bound = -7.115, relative change = 4.464e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 39 (approx. per word bound = -7.112, relative change = 4.574e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 40 (approx. per word bound = -7.108, relative change = 4.994e-04)
Topic 1: life, girl, dance, light, stop
Topic 2: never, make, need, back, keep
Topic 3: time, think, good, night, hold
Topic 4: love, feel, away, heart, way
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 41 (approx. per word bound = -7.105, relative change = 4.405e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 42 (approx. per word bound = -7.103, relative change = 3.982e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 43 (approx. per word bound = -7.100, relative change = 3.928e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 44 (approx. per word bound = -7.097, relative change = 3.737e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 45 (approx. per word bound = -7.095, relative change = 3.417e-04)
Topic 1: life, girl, dance, stop, light
Topic 2: never, make, need, back, keep
Topic 3: time, think, good, hold, mind
Topic 4: love, feel, away, heart, day
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 46 (approx. per word bound = -7.092, relative change = 3.293e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 47 (approx. per word bound = -7.090, relative change = 3.320e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 48 (approx. per word bound = -7.088, relative change = 3.281e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Completing Iteration 49 (approx. per word bound = -7.085, relative change = 3.215e-04)
.......................................................................................................
Completed E-Step (0 seconds).
Completed M-Step.
Model Terminated Before Convergence Reached
plot(fit_date)
labelTopics(fit_date)
Topic 1 Top Words:
Highest Prob: life, girl, dance, stop, light, fire, put
FREX: rise, bun, stayin, wonderland, cinema, protect, boogie
Lift: a'nothing, aalright, abused, akon, alabamy, alangdalangdalang, answer8embedshare
Score: dance, bun, rise, worry, ha, burn, girl
Topic 2 Top Words:
Highest Prob: never, make, need, world, keep, back, leave
FREX: mum, whistle, groove, rocks, sending, bedum, sos
Lift: advantage, awaken, brightly, colored, defeated, doorway, excite
Score: need, mum, never, nothing, leave, tainted, whistle
Topic 3 Top Words:
Highest Prob: time, think, good, mind, hold, home, back
FREX: hallelujah, ella, dada, problem, mercy, geronimo, mou
Lift: accept, amen, arch, articles, ayayay, bit's, bombs
Score: dada, hallelujah, time, think, good, shine, ella
Topic 4 Top Words:
Highest Prob: love, feel, away, heart, day, way, always
FREX: halo, built, hakuna, matata, gently, diablo, el
Lift: align, amor, axis, baow, barricade, blessing, bon
Score: love, away, always, day, halo, heart, really
topics_df = data.frame('Topics'=c('Life','Journey','Nostalgia','Love'),'Text 1' = rep('',4),'Text 2' = rep('',4),'Text 3' = rep('',4))
rownames(topics_df) <- 1:4
for (topic in 1:4){
quotes <- lyricsCorpus_filtered[order(fit_date$theta[,topic],decreasing = T)[1:3]]
topics_df[topic,c('Text.1','Text.2','Text.3')] <- convert(quotes,to='data.frame')[,'text']
#plotQuote(quotes, width = 100, text.cex = 1 ,main = paste("Topic ",topic))
}
topics_df
estm_eff_date <- estimateEffect(1:4 ~ release_date, fit_date, meta = docvars(lyricsCorpus_filtered))
summary(estm_eff_date)
Call:
estimateEffect(formula = 1:4 ~ release_date, stmobj = fit_date,
metadata = docvars(lyricsCorpus_filtered))
Topic 1:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.4639840 0.8644234 1.694 0.0906 .
release_date -0.0006522 0.0004309 -1.514 0.1304
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Topic 2:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.6731735 0.8984976 -1.862 0.0629 .
release_date 0.0009559 0.0004479 2.134 0.0331 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Topic 3:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6789066 0.8709166 0.780 0.436
release_date -0.0001977 0.0004339 -0.456 0.649
Topic 4:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.149e-01 9.446e-01 0.545 0.586
release_date -9.825e-05 4.709e-04 -0.209 0.835
plot(estm_eff_date,'release_date',method='continuous',verbose.labels = F,ci.level=F)